---
title: "Anthromes 12K Discrete Global Grid System: DGG Creation and Data Extraction"
author: "Nicolas Gauthier"
date: "Last knit on: `r Sys.Date()`"
output:
  pdf_document: 
    toc: yes
    latex_engine: xelatex
    highlight: pygments
  html_document: default
---

# Setup

Load **dggridR** and **sf** for calculating the Discrete Global Grid cell geometries, **raster** and **exactextractr** for extracting raster data at the DGG cell locations, and **tidyverse** for data summaries and visualization. Also load this package, **anthromes** for additional analysis and plotting functions.
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(dggridR)
library(sf)
library(raster)
library(exactextractr)
library(tidyverse)
devtools::load_all()
```

# Discrete Global Grid System
Generate an Icosahedral Snyder Equal Area Aperture 3 hexagonal grid. Use the Level 12 resolution for ~100km grid cells.
```{r}
dggs <- dgconstruct(res = 12)
```

Load a a set of DGG IDs we can use as a land mask. These are based on the intersection of the HYDE 3.2 land mask and previously generated Level 12 DGG grids from GLOBE. We use these pre-computed IDs as a land mask for simplicity and to avoid the computational effort of calculating DGG geometries over the ocean.
```{r}
dgg_ids <- read_csv('data/raw_data/dgg_ids.csv', col_types = 'ii') # read ids as integers
```

Create polygons using the DGG system and cell numbers from the HYDE land mask.
```{r, cache = TRUE}
dgg_land <- dgcellstogrid(dggs, dgg_ids$ANL12_ID, 
                          frame = FALSE, 
                          wrapcells = FALSE) %>%
  st_as_sf(crs = 4326) %>%
  bind_cols(dgg_ids) %>%
  # split polygons at the international dateline for better plotting
  st_wrap_dateline(options = c("WRAPDATELINE=YES", "DATELINEOFFSET=30"))
```

# HYDE 3.2
## Fixed Inputs

Load the HYDE 3.2 fixed input rasters including land area, potential vegetation, potential villages, and world region data. Use **exact_extract()** to aggregate the input rasters to the DGG geometries. This function weights the calculations by the coverage fraction of each raster grid cell by each DGG cell. For land area, calculate the total weighted land area in each DGG cell. For the other variables, calculate the modal value in each weighted DGG cell. Because the limiting computational step is the calculation of the cell coverage fractions, its easier to calculate the sum and mode for all layers simultaneously and just select the desired outputs.
```{r}
fixed_inputs <- c('data/raw_data/supporting_5m_grids/maxln_cr.tif',
                  'data/raw_data/supporting_5m_grids/potveg15.tif',
                  'data/raw_data/supporting_5m_grids/potvill20.tif',
                  'data/raw_data/supporting_5m_grids/simple_regions.tif',
                  'data/raw_data/supporting_5m_grids/iso_cr.tif'
                  ) %>%
  stack() %>%
  exact_extract(dgg_land, c('sum', 'mode'), progress = FALSE) %>%
  dplyr::select(land_area = sum.maxln_cr, 
                pot_veg =  mode.potveg15, 
                pot_vill = mode.potvill20, 
                region = mode.simple_regions, 
                country = mode.iso_cr) %>%
  bind_cols(dgg_ids, .)
```


## Land Use and Population

Loop through each variable name, import HYDE rasters from each year, combine them in a brick, and extract by DGG polygon. See the documentation for the *hyde2dgg()* function for more details. This will take a long time.
```{r}
# list desired HYDE variable names
hyde_names <- c('cropland', 'grazing', 'ir_rice', 'popc', 'tot_irri', 'uopp')
  
hyde_dgg <- hyde_names %>%
  setNames(hyde_names) %>% # name the vector so the row bind is cleaner
  # change this and the following paths to point to the HYDE data on your machine
  map_dfr(hyde2dgg, dgg_land, '/Volumes/Data/baseline/zip', .id = 'var')
```

Repeat for Upper and Lower uncertainty estimates. 
```{r}
hyde_dgg_lower <- hyde_names %>%
  setNames(hyde_names) %>% 
  # change this path to point to the HYDE data on your machine
  map_dfr(hyde2dgg, dgg_land, '/Volumes/Data/lower/zip', .id = 'var')
```

```{r}
hyde_dgg_upper <- hyde_names %>%
  setNames(hyde_names) %>% 
  # change this path to point to the HYDE data on your machine
  map_dfr(hyde2dgg, dgg_land, '/Volumes/Data/upper/zip', .id = 'var')
```

# Natural and Modified Habitat

Extract the Natural and Modified Habitats layer using a custom function for calculating the coverage fraction of each of the NMH "potential natural" and "likely natural" categories.
```{r}
nmh <- raster('data/raw_data/WCMC_natural_modified_habitat_screening_layer/natural_modified_habitat_screening_layer.tif') %>%
  exact_extract(dgg_land, function(value, frac) tapply(frac, value, sum) / sum(frac),
                progress = FALSE) %>% 
  map(~{if(length(.) == 0) c(`1` = NA, `2` = NA, `3` = NA, `4` = NA) else .}) %>% 
  map_dfr(as.list) %>%
  replace_na(list(`1` = 0, `2` = 0, `3` = 0, `4` = 0)) %>% # turn NAs into zeros
  transmute(L1_ID = dgg_land$L1_ID, 
            ANL12_ID = dgg_land$ANL12_ID, 
            nmh3 = `3`, nmh4 = `4`)
```

# Three Global Conditions Data

Load the Three Global Conditions data and associated data layers. These are already in DGG format, so just extract the data and cell IDs.
```{r}
tgc <- read_csv('data/raw_data/three_conditions_v4_rep_data.csv') %>% 
  dplyr::select(l1_id, biome, `@3_cond_v4`, hfp_max, 
                pa_km2, kba_km2, ind_cnt, v_rich, v_thr) %>%
  rename(olson_biome = biome, L1_ID = l1_id, tgc = `@3_cond_v4`) %>%
  # round the count variables to whole numbers
  mutate(hfp_max = round(hfp_max), v_rich = round(v_rich), v_thr = round(v_thr)) 
```

```{r}
# fyi, there are some empty cells from the TGC data that are present in NMH
contemp_vars <- left_join(nmh, tgc, by = 'L1_ID')
```

# Check and Save the Results

```{r}
# plot the outputs to check
sf_use_s2(FALSE)
dgg_land %>%
  left_join(fixed_inputs) %>%
st_crop(st_bbox(c(xmin = 30, xmax = 40, ymin = 30, ymax = 40))) %>% # comment out for whole world
  ggplot() +
  geom_sf(aes(fill = potveg15), color = NA) +
  scale_fill_viridis_c() +
  coord_sf(crs = st_crs("+proj=eck4"))
sf_use_s2(TRUE)
```

```{r}
saveRDS(hyde_dgg, 'data/derived_data/hyde_dgg')
saveRDS(hyde_dgg_upper, 'data/derived_data/hyde_dgg_upper')
saveRDS(hyde_dgg_lower, 'data/derived_data/hyde_dgg_lower')
```


```{r}
write_sf(dgg_land, 'data/derived_data/dgg_land.shp')
write_csv(fixed_inputs, 'data/derived_data/fixed_inputs.csv')
write_csv(contemp_vars, 'data/derived_data/contemp_vars.csv')
```
